# Loading in Packages
library(tidyverse)
library(ggtext)


## setting relative directory 
here::i_am("./notebook/Chapter_4_EEG.Rmd")

# loading in User-Defined Functions
source(here::here("my-scripts/R", "eeg_rope_pipeline.R"))
source(here::here("my-scripts/R", "eeg_sim_functions.R"))

## theme for the plots
source(here::here("my-scripts", "R", "project_themes.R"))

# set theme to project_theme
base_size = 12
theme_set(theme_project_light(base_size = base_size))

Chapter 4 of Project - Simulated EEG and ROPE Application

Overview of Chapter 1 - Multivarient Approach

Part One - Generate Raw EEG data

In this step, we use 1/F noise generated from a functions derived from Hank Steven’s work (original function is available on GitHub). After define to templates, one containing an effect and one without, we combine these with noise in order to represent a single trial. We then regenerate the noise again for each participant.

Defining Parameters to Create the Noise and Templates

.true_onset <- 450  ## Effect at 450ms
.stim_on <- 300     ## Stimulus is shown at 300ms
.freq <- 4         ## Defines resolution of stimulation, frequency of electrode (This is around 250 reading a second)
.max_time <- 800    ## End time for Epoch

.Xf <- seq(0, .max_time, .freq) ## start from zero and end at max time, going up in 4ms (freq)
Nf <- length(.Xf)   ## Number of timepoints

# Template one only contains zeros
.temp1 <- vector(mode = "numeric", length = Nf)  

# define magnitude of large effect
.magnit <- 2

# Generate ERP Peak
.erp <- dnorm(seq(-1.5, 1.5, length.out= 200/.freq), 0, 1)
.erp <- .erp - min(.erp)
.erp_small <- .erp / max(.erp)
.erp_large <- .magnit*.erp_small

# Getting length and storing them for later use
.l_erp <- length(.erp)
.l_pre_stim <- ceiling(.true_onset / .freq)

# Template 2 contains the ERP peak
temp1 <- c(rep(0, .l_pre_stim), .erp_small, rep(0, (Nf - .l_erp - .l_pre_stim)))
temp2 <- c(rep(0, .l_pre_stim), .erp_large, rep(0, (Nf - .l_erp - .l_pre_stim)))

Visualising the ERP Peak

tibble(x = .Xf - .stim_on, y = temp2) |> 
  ggplot(aes(x, y)) +
  geom_line(aes(colour = '2'), linewidth = 1.5)+
  geom_line(aes(y = temp1, colour = '1'), linewidth = 1.5)+
  annotate("rect", 
           xmin = .true_onset - .stim_on, 
           xmax = (.true_onset - .stim_on+200)-.freq, 
           ymin = 0, 
           ymax = .magnit,
           alpha = .5, fill = "grey80")+
  # annotate("text", label = "italic(H[0])", 
  #          x = -275, 
  #          y = .magnit * .97, 
  #          hjust = 0, vjust = 1,
  #          size = 10, parse = T)+
  # annotate("text", label = "italic(H[1])", 
  #          x = .true_onset - .stim_on + 15, 
  #          y = .magnit * .97,
  #          hjust = 0, vjust = 1,
  #          colour = "grey10",
  #          size = 10, parse = T)+
  theme_project_light(base_size = 12)+
  scale_y_continuous("", limits = c(0, 2))+
  scale_x_continuous("Time (ms)")+
  labs(title = "Three Simulated ERP Peak")+
  scale_colour_manual("Conditions", 
                      labels = c("1 & 2", "3"), 
                      values = c("darkblue", "forestgreen"), 
                      guide = guide_legend(override.aes = list(size = 10, linewidth = 3)))+
  theme(
    plot.title = element_markdown(size = 23),
    legend.title = element_text(size = 19),
    legend.text = element_text(size = 18)
    )

# ggsave(file = here::here("images", "ch4", "erp_peaks.png"), height = 8, width = 10, dpi = 360)

Combining Templates with Noise - Simulating 12 “experiments”, starting with 2 trials and doubling each time (until 4096 trials)

sample_size <-c(10, 25, 50, 100, 150, 200) # number of trials
.gsp <- 1 # gamma spectral power 
.outvar <- 1 # noise variance

alpha = 0.05 ## Standard Alpha Level for Significance Testing
seed = 1 # set.seed(1)
static_margins = c(0.3, 0.2, 0.1) # static margins for NUll ROPE

effect_time <- c(.true_onset - .stim_on, .true_onset + 200 - .freq - .stim_on)
time_cond <- expression((time < effect_time[1] | time > effect_time[2]))

## storing values in list, so make functions neater
eeg_pipeline_attr <- list(
  sample_size = sample_size, # Defines Sample Size/ Number of Trials 
  alpha = alpha, 
  num_time_points = Nf, 
  max_time = .max_time, 
  cond1_base = temp1, 
  cond2_base = temp2, 
  gamma_spec_power = .gsp, 
  noise_var = .outvar,   # Influence Noise output - Zero means no Noise
  stim_onset = .stim_on  # Time when Stimulus was "shown"
)

## No effect Present (Only using temp1)

eeg_pipeline_attr2 <- eeg_pipeline_attr
eeg_pipeline_attr2$cond2_base <- eeg_pipeline_attr$cond1_base 
## Effect Present in template 2 (temp2)
sim_df_1 <- sim_eeg_pipeline(eeg_pipeline_attr, seed = seed)

sim_df_2 <- sim_eeg_pipeline(eeg_pipeline_attr2, seed = seed)
## df contains p, t, ci and rope min and max values, will also be used to plot line of significance at bottom of graphs
rope_df_1 <- sim_df_1 |> 
  generate_rope_df(eeg_pipeline_attr, 
                   rope_func = get_eeg_nu_rope,
                   static_margins = static_margins, 
                   replication = F, 
                   sequential = F, 
                   null_rope = T)
## 
## A Null ROPE was used to Define Significance
## Warning in dplyr::inner_join(pivot_for_rope(add_rope(df, rope_func = rope_func, : Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 1 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
rope_df_2 <- sim_df_2 |> 
  generate_rope_df(eeg_pipeline_attr2, 
                   rope_func = get_eeg_nu_rope,
                   static_margins = static_margins, 
                   replication = F, 
                   sequential = F, 
                   null_rope = T)
## 
## A Null ROPE was used to Define Significance
## Warning in dplyr::inner_join(pivot_for_rope(add_rope(df, rope_func = rope_func, : Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 1 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
test_sample <- 25

sim_df_1 |> 
  filter(n_trial == test_sample) |> 
  pivot_for_mn(replication = F) |> 
  ggplot(aes(x = time, y = mn, colour = cond)) +
  geom_line(alpha = line_alpha, size = line_size)+
  geom_vline(xintercept = 0, colour = grey)+
  geom_hline(yintercept = 0, colour = grey)+
  scale_x_continuous("Time (ms)")+
  # MetBrewer::scale_color_met_c(name = "Java")+
  labs(title = "Defining Null ROPEs from ERP Values", 
       colour = "Condition")+
  scale_colour_manual(values = MetBrewer::met.brewer("Austria", 3),
                      labels = c("1", "2", "3"), 
                      guide = guide_legend(override.aes = list(linewidth = 2, shape = NA), order = 1))+
  scale_y_continuous("", breaks = c(-1, 0, 1, 2))+
  geom_ribbon(data = subset(sim_df_1, n_trial == test_sample & (!eval(time_cond))), 
              aes(x = time, ymin = mn_cond1, ymax = mn_cond2, fill = '1'), 
              inherit.aes = F, outline.type = "full", alpha = .4)+
  geom_ribbon(data = subset(sim_df_1, n_trial == test_sample & (time <= 0)), 
              aes(x = time, ymin = mn_cond1, ymax = mn_cond3, fill = '2'), 
              inherit.aes = F, outline.type = "full", alpha = .4)+
  annotate("rect", xmin = -.stim_on, xmax = 0, ymin = -1, ymax = 2.1, alpha = .2)+
  annotate("rect", xmin = effect_time[1], xmax = effect_time[2], ymin = -1, ymax = 2.1, alpha = .2)+
  scale_fill_manual("Null ROPE", labels = c("Within ROI", "Before Stimulus"), values = c("red4", "purple4"))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

  # geom_ribbon(data = rope_df_1 |> filter(rope %in% "control", n_trial == test_sample, comparison == 12),
  #             aes(x = time, ymin = min, ymax = max, group = rope, fill = '1'), 
  #             inherit.aes = F,  outline.type = "full", alpha = .3)+
  # geom_ribbon(data = rope_df_1 |> filter(rope %in% "prestim", n_trial == test_sample, comparison == 12),
  #             aes(x = time, ymin = min, ymax = max, group = rope, fill = '2'), 
  #             inherit.aes = F,  outline.type = "full", alpha = .3)

ggsave(file = here::here("images/ch4/rope_demo.png"), height = 8, width = 10, dpi = 360)
.p1 <- sim_df_1 |> 
  plot_sim_base(rope_df = rope_df_1) 

.p2 <- sim_df_2 |> 
  plot_sim_base(rope_df = rope_df_2)

.p1
## [[1]]

.p2
## [[1]]

# ggsave(plot = p1, filename = here::here("images", "ch4_rope_eff_plain.png"), width = 10, height = 8, dpi = 360)
# ggsave(plot = p1, filename = here::here("images", "ch4_rope_eff_plain.png"), width = 10, height = 8, dpi = 360)

Part Two - Define Nulls ROPEs

Experiments with an Effect

We are going to define two ROPE (Regions of Practical Equivalence) using the data to represent the null and alternative hypothesis. The Null ROPE will be defined by the 90% quantile range of the mean pre-stimulus electrical activity for both groups and the Alternative ROPE will be defined by the mean differences of only the significant p-values. It may be better specify the range of time points a priori, however, I will use two different ROPEs to represent the alternative hypthesis, one defined from raw p-values and the other by bonferroni corrected p-values. This is provide a more conservative definition of an effect, as some results pre-stimulus can be significant and those false positive should not influence the alternative ROPE.

Once the ROPE is created for an “experiment”, it will superimpose the data of the next experiment, to see how the new data fits to the previous study. If the Confidence intervals are perfectly contained in a ROPE, then we can consider that as support for rejecting and maintaining the null hypothesis. For example, if the Confidence interval is inside the Null ROPE, even if the difference is statistically significant, this should likely be interpreted as evidence for keeping the null. Likewise, if the alternative ROPE covers the majority of the ERP amplitude, then that would be evidence of a replication of the underlying effect and thus, evidence to reject the null.

.sim_1 <- plot_rope(.p1, rope_df_1, null_rope = T, change_subtitle = F)

.sim_1
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

As sample size increases the effect becomes more clear and obvious, and the background noise appears to cancel itself out. This in turn increases the size of the alternative ROPEs, both ROPE defined from raw p-values and bonferroni corrected are wided at the larger sample size. However, raw p-values cause the ROPE to cover the entire range of values, including values where an effect is unlikely. Moreover, it appears only the Bonferroni defined ROPE maintains a gap between the two ROPEs, the Null and Alternative, whch would allow easier interpretation. For, if the ROPEs overlap, and the Confidence interval stretches across both, then it is unclear which ROPE it supports, preventing us to make a clear preference of a hypothesis.

It is worth noting that the peak of each of the effects appears to go above the ROPE, as the quantile range which is used to define these ROPEs would need to contain values larger than what is observed. It may not be suitable to use a ROPE to investigate the maximum values, instead it should likely be used to prevent false positives and ensure the observed difference is outside an interval which represents background noise.

Experiments without an Effect

To see how a ROPE can prevent false positives, we ran the EEG simulation but with no underlying effect, therefore any statistically significant results are false positives.

.sim_2 <- plot_rope(.p2, rope_df_2, null_rope = T)

.sim_2
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

Replication of ERP peaks

rep_df_1 <- sim_repli_eeg_pipeline(pipeline_attr = eeg_pipeline_attr, seed = seed, total_experiments = 2)
rep_df_2 <- sim_repli_eeg_pipeline(pipeline_attr = eeg_pipeline_attr2, seed = seed, total_experiments = 2)
rep_rope_1 <- rep_df_1 |>
  generate_rope_df(pipeline_attr = eeg_pipeline_attr, 
                   rope_func = get_eeg_nu_rope,
                   static_margins = static_margins, 
                   replication = T, 
                   sequential = F, 
                   null_rope = T)
## 
## A Null ROPE was used to Define Significance
## Warning in dplyr::inner_join(pivot_for_rope(add_rope(df, rope_func = rope_func, : Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 1 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
rep_rope_2 <- rep_df_2 |>
  generate_rope_df(pipeline_attr = eeg_pipeline_attr2, 
                   rope_func = get_eeg_nu_rope,
                   static_margins = static_margins, 
                   replication = T, 
                   sequential = F, 
                   null_rope = T)
## 
## A Null ROPE was used to Define Significance
## Warning in dplyr::inner_join(pivot_for_rope(add_rope(df, rope_func = rope_func, : Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 1 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
.rep_p1 <- rep_df_1 |>
  plot_rep_base(rep_rope_1)

.rep_p2 <- rep_df_2 |>
  plot_rep_base(rep_rope_2)
.rep_p1
## [[1]]

.rep_p2
## [[1]]

.rep_1 <- plot_rope(.rep_p1, rep_rope_1, null_rope = T, replication = T)
.rep_1
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

.rep_2 <- plot_rope(.rep_p2, rep_rope_2, null_rope = T, replication = T)
.rep_2
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

Alternative ROPE

rope_df_1a <- sim_df_1 |> 
  generate_rope_df(eeg_pipeline_attr, 
                   rope_func = get_eeg_alter_rope, 
                   replication = F, 
                   sequential = F, 
                   null_rope = F,
                   replicate.effect = T)
## 
## An Alternative ROPE was used to Define Significance
rope_df_2a <- sim_df_2 |> 
  generate_rope_df(eeg_pipeline_attr2, 
                   rope_func = get_eeg_alter_rope,
                   replication = F, 
                   sequential = F, 
                   null_rope = F, 
                   replicate.effect = T)
## 
## An Alternative ROPE was used to Define Significance
## adding alternative rope to previous simulations
.sim_1a <- plot_rope(.sim_1, df = rope_df_1a, null_rope = F, change_subtitle = T, replication = F)
.sim_1a
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## adding alternative rope to previous simulations
.sim_2a <- plot_rope(.sim_2, df = rope_df_2a, null_rope = F, change_subtitle = T)
.sim_2a
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

Applying Alternative ROPE to replications

rep_rope_1a <- rep_df_1 |>
  generate_rope_df(pipeline_attr = eeg_pipeline_attr, 
                   rope_func = get_eeg_alter_rope,
                   replication = T, 
                   sequential = F, 
                   null_rope = F,
                   replicate.effect = T)
## 
## An Alternative ROPE was used to Define Significance
rep_rope_2a <- rep_df_2 |>
  generate_rope_df(pipeline_attr = eeg_pipeline_attr2, 
                   rope_func = get_eeg_alter_rope,
                   replication = T, 
                   sequential = F, 
                   null_rope = F, 
                   replicate.effect = T)
## 
## An Alternative ROPE was used to Define Significance
.rep_1a <- plot_rope(.rep_1, df = rep_rope_1a, null_rope = F, change_subtitle = T, replication = T)
.rep_1a
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

.rep_2a <- plot_rope(.rep_2, df = rep_rope_2a, null_rope = F, change_subtitle = T, replication = T)
.rep_2a
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

Dynamic Alpha Rule - min(alpha, beta)

source(here::here("my-scripts/R/dyn_alpha_functions.R"))

## get new alpha level
df_dyn_1 <- rep_df_1 |> 
  get_new_alpha(method = "min.ab")

## compare new alpha to p-value, used to plot significant lines
dyn_stats_1 <- rep_df_1 |> 
  get_sig_lines(dyn_df = df_dyn_1)

.rep_dyn_p1 <- .rep_p1[[1]] |> ## use the rep_p1 plot as a base, then add the new sig lines over it
  plot_dyn_alpha(dyn_df = dyn_stats_1, base_size = 12, method = "min.ab")

.rep_dyn_p1
## [[1]]

Dynamic Alpha Rule - Adaptive Alpha Formula

Perez and Pericchi’s (2014) formula in the {NetworkToolbox} Package

library(NetworkToolbox)

df_dyn_2 <- rep_df_1 |> 
  get_new_alpha(method = "adapt.a")

dyn_stats_2 <- rep_df_1 |> 
    get_sig_lines(dyn_df = df_dyn_2)


.rep_dyn_p2 <- .rep_p1[[1]] |> ## use the rep_p1 plot as a base, then add the new sig lines over it
  plot_dyn_alpha(dyn_df = dyn_stats_2, base_size = 12, method = "adapt.a")

.rep_dyn_p2[[1]]

Bayes Factors (Testing/ Not Final)

sim_bf_1 <- sim_eeg_pipeline(eeg_pipeline_attr, seed = seed, get_bf = T) |> 
  mutate(
    bf_sig = if_else(bf_13 > 3, 0, NA),
    bf_null_sig = if_else((1/bf_13) > 3, 0, NA),
    )

rep_bf_1 <- sim_repli_eeg_pipeline(eeg_pipeline_attr, seed = seed, total_experiments = 2, get_bf = T)|> 
  mutate(
    bf_sig = if_else(bf_13 > 3, 0, NA),
    bf_null_sig = if_else((1/bf_13) > 3, 0, NA),
    )
.p_bf1 <- sim_bf_1 |> 
  plot_sim_base()
  
.bf_p1 <- .p_bf1[[1]] + 
  geom_point(data = sim_bf_1, aes(x = time, y = bf_sig -.9, fill = '1' ), shape = 22, stroke = NA, inherit.aes = F)+
  geom_point(data = sim_bf_1, aes(x = time, y = bf_null_sig - 1, fill = '2'), shape = 22, stroke = NA, inherit.aes = F)+
  scale_fill_manual("Bayes Factor", labels = c("Support Alternative", "Support Null"), values = fill_colours,
                    guide = guide_legend(override.aes = list(size = 5, colour = NA)))+
  labs(subtitle = "Using Bayesian Model Comparison - <b>Bayes Factors</b>")+
  theme_project_light(base_size = base_size)

.bf_p1
.rep_p1_bf <- rep_bf_1 |> 
  plot_rep_base(replication = T)
  
.rep_bf_p1 <- .rep_p1_bf[[1]] + 
  geom_point(data = sim_bf_1, aes(x = time, y = bf_sig -.8, fill = '1' ), shape = 22, stroke = NA, inherit.aes = F)+
  geom_point(data = sim_bf_1, aes(x = time, y = bf_null_sig - 1, fill = '2'), shape = 22, stroke = NA, inherit.aes = F)+
  scale_fill_manual("Bayes Factor", labels = c("Supports Alternative", "Supports Null"), values = fill_colours,
                    guide = guide_legend(override.aes = list(size = 5, colour = NA)))+
  labs(subtitle = "Using Bayesian Model Comparison - <b>Bayes Factors</b>")+
  theme_project_light(base_size = base_size)

.rep_bf_p1

Bayesian Parametre Estimation and HDI (Testing/Not Final)

# sim_df_1

Just like the ROPEs for when there is an underlying effect, the ROPE constructed from raw p-values appears to be less precise and helpful. Here we can see how False positves influence the ROPE whilst no results were significant after bonferroni correction.

Not Currently Up-to-date - May be Removed

Evaluating the False Positives with respect to no ROPE, and the different types of ROPEs

The use of a ROPE should reduce the number of false positives, as if the Confidence interval of a statistically significant point is not completely outside the Null ROPE then no conclusions can be made.

It is probably worth replicating this pipeline (the above process) several times to assess behaviour in the long run - as consistency is difficult to assess from a few trials. These following graphs are influenced by randomness, as the noise is randomly generated, so repeating this pipeline should reduce this unwanted influence.

seeds = 1:1000 # the seeds that were used in the 1000 repetitions of the Pipeline

## Reading the Pipeline Simulations - 1000 repetitions
eval_rope_1 <- read_csv(here::here("sim_data/ch4_eeg_sims/large_sim_rope_eval_df.csv"))
sim_rope_2 <- read_csv(here::here("sim_data","ch4_eeg_sims", "sim_egg_pipeline_no_eff.csv"))

## Evaluating the ROPEs of Number of False Positives
# eval_rope_1 <- sim_rope_1 |> 
#   eval_rope(alpha = 0.05, effect_time_vector = effect_time)

# eval_rope_2 <- sim_rope_2 |> 
#   eval_rope(alpha = 0.05, effect_time_vector = effect_time)
## Functions to create graphs
source(here::here("my-scripts", "R", "eeg_rope_graphs.R"))
.eval_rope_g1 <- eval_rope_1 |> 
  filter(!rope %in% c("controlt", "nonsig")) |> 
  fp_graphs(effect = T, var = rope)
## [[1]]

## 
## [[2]]

# ggsave(plot = .eval_rope_g1[[1]], file = here::here("images/ch5", "fp_ropes.png"), width = 13, height = 10, dpi = 360)
# .eval_rope_g2 <- eval_rope_2 |> 
#   fp_graphs(effect = F)

Raw p-values are going to generate false positives, which is expected after doing around 400 t-tests. The ROPE does seem to catch some of these, as if the Confidence interval overlaps with the ROPE, then we don’t conclude it is a significant difference and without a decision. However, bonferroni corrected p-values seems to be prefect as controlling for false positives, which is probably because of the multivariate approach used in the analysis. IF the EEG was analysed with the mean amplitude then bonferroni correction will start to resemble the raw p values, and hopefully the ROPE will still be effect.

Evaluating the True Positives with respect to no ROPE, and the different types of ROPEs

Whilst minimising False positives is desirable, we can achieve a false positive rate of zero by sacrificing our ability to detect meaningful statistical differences. This is the classic issue of Sensitivity vs Specificity. For example, if we set the margins of a ROPE to -10 and +10, we would have no False Positive because every value is inside that ROPE, so we would have no True positives.

.eval_rope_g3 <- eval_rope_1 |> 
    filter(!rope %in% c("controlt", "nonsig")) |> 
    tp_graphs(var = rope)
## [[1]]

## 
## [[2]]

# ggsave(plot = .eval_rope_g3[[2]], file = here::here("images/ch5", "tp_ropes.png"), width = 13, height = 10, dpi = 360)
# auc_df <- eval_rope_df |>
#   inner_join(tp_df_1, by = c("n_trial", "rope"))

## Not as neat as I expected - maybe we need more values or the data is not properly formatted?
# eval_rope_1|>
#   filter(n_trial != 10) |>
#   ggplot(aes(x = fpr, y = tpr, colour = rope))+
#   geom_line()+
#   theme_minimal()

Next steps: Repeat pipeline to get smooth curves and long-term behaviour of the various methods Repeat pipeline but analyse data using mean amplitude instead of t-tests at every time point Add references to Laken’s and Kruschke’s work, tidy up unprofessional language.

Evaluating dynamic alpha level

eval_min_1 <- read_csv(here::here("sim_data/ch4_eeg_sims/large_sim_min_eval_df.csv"))
## Rows: 12 Columns: 30
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): experiment
## dbl (29): n_trial, fp_2, tp_2, fn_2, tn_2, fp_r, tp_r, fn_r, tn_r, fp_b, tp_...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
eval_adapt_1 <- read_csv(here::here("sim_data/ch4_eeg_sims/large_sim_adapt_eval_df.csv"))
## Rows: 12 Columns: 30
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): experiment
## dbl (29): n_trial, fp_2, tp_2, fn_2, tn_2, fp_r, tp_r, fn_r, tn_r, fp_b, tp_...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dyn_alpha_p1 <- plot_eval_dyn_alpha(min_df = eval_min_1, adapt_df = eval_adapt_1, rate = "fpr")
dyn_alpha_p1
## [[1]]
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis

## 
## [[2]]

# ggsave(plot = dyn_alpha_p1[[2]], file = here::here("images/ch5", "fp_dyn_alpha.png"), width = 13, height = 10, dpi = 360)
dyn_alpha_p2 <- plot_eval_dyn_alpha(min_df = eval_min_1, adapt_df = eval_adapt_1, rate = "tpr")
dyn_alpha_p2
## [[1]]
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis

## 
## [[2]]

# ggsave(plot = dyn_alpha_p2[[2]], file = here::here("images/ch5", "tp_dyn_alpha.png"), width = 13, height = 10, dpi = 360)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
grid_p1 <- grid.arrange(.eval_rope_g1[[1]] + labs(tag = "A"), 
             .eval_rope_g3[[2]] + labs(tag = "B"), 
             dyn_alpha_p1[[2]] + labs(tag = "C"), 
             dyn_alpha_p2[[2]] + labs(tag = "D"), 
             nrow = 2)

# ggsave(plot = grid_p1, file = here::here("images/ch5", "fp_tp_grid.png"), width = 13, height = 10, dpi = 360)

Overview of Chapter 2 - Mean Amplitude Approach

alpha = 0.05 ## Standard Alpha Level for Significance Testing
seed = 1 # set.seed(1)
static_margins = c(0.3, 0.2, 0.1) # static margins for NUll ROPE

## defining region where peak occurs - used to find mean amplitude
time_cond <- expression((time < effect_time[1] | time > effect_time[2]))
## Process to proform anovas on each set of n_trial for both conditions
peak_anova_df_1 <- sim_df_1 |> 
  select(1:4) |>  # removing columns that are used for Multivarient Approach
  mutate(in_effect = if_else(!eval(time_cond), 1, 0)) |> 
  pivot_longer(cols = starts_with("mn_cond"), names_to = "cond", values_to = "value") |> 
  mutate(cond = as.factor(cond)) |> 
  filter(!eval(time_cond)) |> 
  nest(data = -n_trial) |> 
  mutate(model = map(.x = data, ~ anova(lm(value ~ cond, .))), 
         tidy = map(model, broom::tidy)) |> 
  select(n_trial, tidy) |> 
  unnest(cols = c(tidy))
# 
# peak_df_1 <- sim_df_1 |> 
#   select(1:5) |>  # removing columns that are used for Multivarient Approach
#   mutate(in_effect = if_else(!eval(time_cond), 1, 0)) |> 
#   group_by(n_trial, in_effect) |> 
#   summarise(
#     mn_peak = mean(mn_diff, na.rm = T),
#     mn_peak_t1 = mean(mn_diff, na.rm = T, trim = .2),
#     mn_peak_t1 = mean(mn_diff, na.rm = T, trim = .1),
#     mn_peak_t5 = mean(mn_diff, na.rm = T, trim = .05),
#     sd_peak = sd(mn_diff, na.rm = T), 
#     .groups = "drop"
#     ) |> 
#   pivot_longer(cols = contains("mn_"), 
#                names_to = "mean_type", 
#                values_to = "value")
  
peak_df_1 <- sim_df_1 |> 
  select(1:4) |>  
  mutate(in_effect = if_else(!eval(time_cond), 1, 0)) |>
    dplyr::group_by(n_trial, in_effect) |> 
    dplyr::summarise(
      mn_peak_cond1 = mean(mn_cond1),
      mn_peak_cond2 = mean(mn_cond2),
      mn_diff = mn_peak_cond2 - mn_peak_cond1,
      p = t.test(mn_cond1, mn_cond2)[["p.value"]],
      ci_l = t.test(mn_cond2, mn_cond1, conf.level = 1 - alpha, alternative = "two.sided")[["conf.int"]][1],
      ci_u = t.test(mn_cond2, mn_cond1, conf.level = 1 - alpha, alternative = "two.sided")[["conf.int"]][2],
      .groups = "drop"
    ) |> 
    dplyr::group_by(n_trial) |>
    dplyr::mutate(raw_sig = dplyr::if_else(p < alpha, min(ci_l), NA)) |>  # just so the graph has a line at zero to show significance
    dplyr::ungroup()



#   
# sim_df_2 |> 
#   select(1:5) # removing columns that are used for Multivarient Approach
#   
peak_df_1
## # A tibble: 12 × 9
##    n_trial in_effect mn_peak_cond1 mn_peak_cond2  mn_diff           p     ci_l
##      <dbl>     <dbl>         <dbl>         <dbl>    <dbl>       <dbl>    <dbl>
##  1      10         0      -0.0991        0.0409   0.140   0.000382     0.0636 
##  2      10         1       0.899         0.465   -0.434   0.000000553 -0.594  
##  3      25         0      -0.0423        0.0156   0.0579  0.00291      0.0200 
##  4      25         1       0.723         0.543   -0.180   0.0109      -0.317  
##  5      50         0      -0.00940      -0.0284  -0.0190  0.0842      -0.0406 
##  6      50         1       0.621         0.680    0.0590  0.373       -0.0719 
##  7     100         0       0.00175       0.0115   0.00974 0.299       -0.00870
##  8     100         1       0.586         0.556   -0.0302  0.640       -0.158  
##  9     150         0       0.00403       0.0214   0.0174  0.0248       0.00221
## 10     150         1       0.579         0.525   -0.0538  0.409       -0.183  
## 11     200         0       0.00558       0.00680  0.00122 0.848       -0.0112 
## 12     200         1       0.574         0.570   -0.00377 0.955       -0.136  
## # ℹ 2 more variables: ci_u <dbl>, raw_sig <dbl>
peak_df_1 |> 
  filter(in_effect == 1) |> 
  ggplot(aes(n_trial, group = n_trial))+
  geom_point(aes(y = mn_peak_cond1), colour = line_colours[1], shape = 15)+
  geom_point(aes(y = mn_peak_cond2), colour = line_colours[2], shape = 15)+
  geom_hline(aes(yintercept = 0), size = 1.05)+
  ggtitle("Mean Amplitude of ERP Peaks in two Conditions")+ 
  labs(subtitle = glue::glue("Conditions:, 
                            <b><span style='color:{line_colours[1]};'>Control Group </span></b>and 
                            <b><span style='color:{line_colours[2]};'>Alternative</span></b>"))+
  scale_y_continuous("Mean Amplitude (mV)")+
  scale_x_continuous("Sample Size")
  # geom_point(aes(y = mn_diff), colour = "grey", shape = 15)
  # geom_errorbar(aes(ymin = mn_peak_cond2 - ci_l, ymax = mn_diff + ci_u), colour = line_colours[2])

# ggsave(filename = here::here("images", "ch4_mean_peak_1.png"), width = 10, height = 8, dpi = 360)

peak_df_1 |> 
  filter(in_effect == 1) |> 
  ggplot(aes(n_trial, group = n_trial))+
  geom_point(aes(y = mn_diff), colour = line_colours[1], shape = 15)+
  geom_errorbar(aes(ymin = mn_diff - abs(ci_l), ymax = mn_diff + abs(ci_u)), colour = line_colours[1], width = 5)+
  # geom_point(aes(y = mn_peak_cond2), colour = line_colours[2], shape = 15)+
  geom_hline(aes(yintercept = 0), size = 1.05)+
  ggtitle("Mean Difference in Amplitude of ERP Peaks in Various Sample Sizes")+ 
  scale_y_continuous("Mean Amplitude (mV)")+
  scale_x_continuous("Sample Size")+
  labs(subtitle = "Error Bars Represent 95% Confidence Intervals of Mean Difference")

# ggsave(filename = here::here("images", "ch4_mean_peak_2.png"), width = 10, height = 8, dpi = 360)